In [1]:
import xgboost as xgb
from sklearn import model_selection
from sklearn import metrics

import numpy as np
import pandas as pd

import os
import pickle
import itertools
import random
import math
import time
from datetime import timedelta

%matplotlib inline
np.set_printoptions(precision=2, suppress=True)

Data Overview


In [2]:
DATA_DIRECTORY = "../data/topviewkinect/chi"
DATA_INFO = ""

In [3]:
all_features_csv = "{data_dir}/{data}features.csv".format(data_dir=DATA_DIRECTORY, data=DATA_INFO)
all_labels_csv = "{data_dir}/{data}labels.csv".format(data_dir=DATA_DIRECTORY, data=DATA_INFO)

all_features_df = pd.read_csv(all_features_csv)
all_labels_df = pd.read_csv(all_labels_csv)

In [4]:
all_features_df.shape, all_labels_df.shape


Out[4]:
((77024, 73), (77024, 4))

In [5]:
all_features_df.head()


Out[5]:
layer_area_0 layer_area_1 layer_area_2 layer_contours_0 layer_contours_1 layer_distance_0 layer_distance_1 layer_distance_2 layer_distance_3 layer_distance_4 ... interlayer_pos_16 interlayer_pos_17 extremities0 extreme_infrared_0 extreme_infrared_1 extreme_infrared_2 extreme_infrared_3 extreme_infrared_4 extreme_infrared_5 subject
0 0.297578 0.411765 0.290657 3.0 3.0 16.552900 26.683300 26.019199 26.683300 201.0 ... -26.0 -107.0 4.0 0.0 10.0 11.5 11.5 0.0 11.5 1.0
1 0.310345 0.419238 0.270417 3.0 3.0 16.401199 26.476400 26.019199 26.476400 191.5 ... -26.0 -105.0 5.0 0.5 9.0 11.0 1.0 0.5 11.0 1.0
2 0.318015 0.386029 0.295956 3.0 3.0 16.124500 26.248800 27.018499 26.248800 174.5 ... -26.0 -104.0 5.0 0.0 12.5 4.5 4.5 0.5 13.0 1.0
3 0.348399 0.384181 0.267420 3.0 3.0 16.401199 26.419701 26.476400 26.419701 164.0 ... -25.0 -103.0 5.0 0.0 6.0 4.5 0.0 0.0 7.0 1.0
4 0.356383 0.370567 0.273050 3.0 3.0 17.719999 27.459101 27.459101 27.459101 164.5 ... -26.0 -107.0 3.0 0.0 0.0 0.5 0.0 0.0 0.5 1.0

5 rows × 73 columns


In [6]:
all_labels_df.head()


Out[6]:
activity orientation orientation_accurate subject
0 0 130 -1 1
1 0 130 -1 1
2 0 120 -1 1
3 0 130 -1 1
4 0 150 -1 1

In [7]:
subjects_list = np.unique(all_labels_df["subject"])
subjects_list


Out[7]:
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12], dtype=int64)

In [8]:
activities_list = np.unique(all_labels_df["activity"])
activities_list


Out[8]:
array([0, 1, 2, 3, 4, 5], dtype=int64)

Training


In [9]:
activity_samples_df = pd.DataFrame(0, index=np.arange(6), columns=np.arange(12))
for subject_id in range(1, 13):
    d = all_labels_df[all_labels_df["subject"] == subject_id]
    for activity_id in range(6):
        activity_samples_df.ix[activity_id][subject_id-1] = len(d[d["activity"] == activity_id])
activity_samples_df


Out[9]:
0 1 2 3 4 5 6 7 8 9 10 11
0 964 657 769 869 958 801 630 1085 716 2216 1902 1525
1 1408 1540 1447 1261 1458 1242 1341 1298 1102 2251 2414 2170
2 472 282 224 588 393 1243 432 766 915 496 810 1388
3 1011 343 931 774 1055 777 673 837 731 1694 1404 1478
4 994 853 782 866 740 1017 807 982 656 2344 1512 1456
5 740 481 796 737 963 858 694 763 613 1728 1488 2413

In [10]:
median_activity_samples = [int(np.median(activity_samples_df.iloc[i])) for i in range(6)]
median_activity_samples


Out[10]:
[913, 1427, 542, 884, 924, 779]

In [11]:
def sample_data(data, n, random_state):
    data_size = len(data)
    if data_size <= n:
        return data.sample(n=n, replace=True, random_state=random_state)
    else:
        sampled_data = pd.DataFrame(columns=data.columns)
        stride = math.ceil(data_size / n)
        i = 0
        current_n = 0
        while current_n < n:
            sampled_data = sampled_data.append(data.iloc[i], ignore_index=True)
            i += stride
            if i+1 > data_size:
                i = data_size - i+1
            current_n += 1
        return sampled_data

In [12]:
X_df = pd.DataFrame([])
y_df = pd.DataFrame([])

for subject_id in subjects_list:
    print("Subject", subject_id)
    for activity_id in activities_list:
        subject_df = all_labels_df[all_labels_df["subject"] == subject_id]
        subject_activity_df = subject_df[subject_df["activity"] == activity_id]
        data_indices = subject_activity_df.index
        
        subject_activity_y = subject_activity_df["activity"].values
        subject_activity_X = all_features_df.iloc[data_indices]
        subject_activity_X = subject_activity_X.assign(activity=subject_activity_y)
        
        num_samples = median_activity_samples[activity_id]
        subject_activity_X = sample_data(data=subject_activity_X, n=num_samples, random_state=42)
        subject_activity_y = pd.DataFrame({
            "subject": subject_activity_X["subject"], 
            "activity":subject_activity_X["activity"]
        })
        subject_activity_X.drop(labels="activity", axis=1, inplace=True)
        
        X_df = X_df.append(subject_activity_X, ignore_index=True)
        y_df = y_df.append(subject_activity_y, ignore_index=True)


Subject 1
Subject 2
Subject 3
Subject 4
Subject 5
Subject 6
Subject 7
Subject 8
Subject 9
Subject 10
Subject 11
Subject 12

In [13]:
X_df.head()


Out[13]:
layer_area_0 layer_area_1 layer_area_2 layer_contours_0 layer_contours_1 layer_distance_0 layer_distance_1 layer_distance_2 layer_distance_3 layer_distance_4 ... interlayer_pos_16 interlayer_pos_17 extremities0 extreme_infrared_0 extreme_infrared_1 extreme_infrared_2 extreme_infrared_3 extreme_infrared_4 extreme_infrared_5 subject
0 0.297578 0.411765 0.290657 3.0 3.0 16.552900 26.683300 26.019199 26.683300 201.0 ... -26.0 -107.0 4.0 0.0 10.0 11.5 11.5 0.0 11.5 1.0
1 0.318015 0.386029 0.295956 3.0 3.0 16.124500 26.248800 27.018499 26.248800 174.5 ... -26.0 -104.0 5.0 0.0 12.5 4.5 4.5 0.5 13.0 1.0
2 0.356383 0.370567 0.273050 3.0 3.0 17.719999 27.459101 27.459101 27.459101 164.5 ... -26.0 -107.0 3.0 0.0 0.0 0.5 0.0 0.0 0.5 1.0
3 0.407066 0.351767 0.241167 3.0 3.0 20.615499 31.400600 28.600700 25.709900 156.5 ... -30.0 -110.0 5.0 0.0 3.5 3.5 19.0 4.0 19.5 1.0
4 0.466003 0.388060 0.145937 3.0 3.0 22.203600 30.805799 21.023800 30.805799 187.0 ... -32.0 -110.0 4.0 8.0 4.0 2.0 5.5 0.0 8.0 1.0

5 rows × 73 columns


In [14]:
y_df.head()


Out[14]:
activity subject
0 0.0 1.0
1 0.0 1.0
2 0.0 1.0
3 0.0 1.0
4 0.0 1.0

In [16]:
X_df.shape, y_df.shape


Out[16]:
((65628, 73), (65628, 2))

In [17]:
subject_train_indices = [1, 3, 5, 7, 9, 11]
subject_test_indices = [2, 4, 6, 8, 10, 12]

In [18]:
X_train_df = X_df[X_df["subject"].isin(subject_train_indices)].reset_index(drop=True)
y_train_df = y_df[y_df["subject"].isin(subject_train_indices)].reset_index(drop=True)
X_test_df = X_df[X_df["subject"].isin(subject_test_indices)].reset_index(drop=True)
y_test_df = y_df[y_df["subject"].isin(subject_test_indices)].reset_index(drop=True)

init_X_all = all_features_df.drop(labels="subject", axis=1).values
init_y_all = all_labels_df["activity"].values
X_train = X_train_df.drop(labels="subject", axis=1).values
y_train = y_train_df["activity"].values
X_test = X_test_df.drop(labels="subject", axis=1).values
y_test = y_test_df["activity"].values
X_all = np.concatenate([X_train, X_test])
y_all = np.concatenate([y_train, y_test])

In [19]:
init_X_all.shape, init_y_all.shape


Out[19]:
((77024, 72), (77024,))

In [20]:
X_train.shape, y_train.shape


Out[20]:
((32814, 72), (32814,))

In [21]:
X_test.shape, y_test.shape


Out[21]:
((32814, 72), (32814,))

In [22]:
X_all.shape, y_all.shape


Out[22]:
((65628, 72), (65628,))

In [23]:
init_all_dmatrix = xgb.DMatrix(init_X_all, init_y_all)
train_dmatrix = xgb.DMatrix(X_train, y_train)
test_dmatrix = xgb.DMatrix(X_test, y_test)
all_dmatrix = xgb.DMatrix(X_all, y_all)

Cross Validation


In [23]:
cv = []
for subject_id in subject_train_indices:
    train_indices = y_train_df[y_train_df["subject"] != subject_id].index.tolist()
    validation_indices = y_train_df[y_train_df["subject"] == subject_id].index.tolist()
    cv.append((train_indices, validation_indices))

In [24]:
def cv_tune_num_boost_round(X, y, cv, params, num_boost_round, early_stopping, verbose):
    
    start = time.time()
    
    cv_errors = list()
    
    for cv_idx, (train_indices, validation_indices) in enumerate(cv):
        print("| CV:", cv_idx)
        
        cv_X_train = np.take(X, train_indices, axis=0)
        cv_y_train = np.take(y, train_indices, axis=0)
        cv_train_dmatrix = xgb.DMatrix(cv_X_train, cv_y_train)

        cv_X_validation = np.take(X, validation_indices, axis=0)
        cv_y_validation = np.take(y, validation_indices, axis=0)
        cv_validation_dmatrix = xgb.DMatrix(cv_X_validation, cv_y_validation)

        watchlist = [(cv_train_dmatrix, "train"), (cv_validation_dmatrix, "eval")]
        cv_result = {}

        model = xgb.train(params=params, dtrain=cv_train_dmatrix, evals=watchlist, evals_result=cv_result, 
                          num_boost_round=num_boost_round, verbose_eval=verbose)
        
        cv_errors.append(cv_result)
        
        print("\n| Elapsed: {elapsed}\n".format(elapsed=timedelta(seconds=(time.time() - start))))

    validation_errors = [errors["eval"]["merror"] for errors in cv_errors]
    validation_avg_errors = [np.median(errors) for errors in zip(*validation_errors)]
    
    for i in range(early_stopping - 1, num_boost_round):
        validation_range_start = i - (early_stopping - 1)
        validation_range_errors =  validation_avg_errors[validation_range_start:i+1]
        if validation_range_errors[0] <= np.min(validation_range_errors):
            return cv_errors, np.min(validation_avg_errors[0:i+1]), np.argmin(validation_avg_errors[0:i+1])+1

    return cv_errors, np.min(validation_avg_errors), np.argmin(validation_avg_errors)+1

In [25]:
def cv_tune_tree_booster(X, y, params, cv_params, cv, n_jobs=-1, verbose=2):
    
    cv_model = model_selection.GridSearchCV(
        xgb.XGBClassifier(**params), param_grid=cv_params, cv=cv, n_jobs=n_jobs, verbose=verbose)
    
    cv_model.fit(X, y)
    
    return cv_model

1. Fix parameters and find the number of estimators (boosting rounds)


In [26]:
params = {
    "learning_rate": 0.05,
    "n_estimators": 100,
    "max_depth": 5,
    "min_child_weight": 1,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [27]:
print("----------------------------------")
params


----------------------------------
Out[27]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.5,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 5,
 'min_child_weight': 1,
 'n_estimators': 100,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

In [28]:
cv_errors, min_error, n_estimators = cv_tune_num_boost_round(
    X=X_train, y=y_train, cv=cv, params=params, num_boost_round=500, early_stopping=50, verbose=50)


| CV: 0
[0]	train-merror:0.101481	eval-merror:0.250137
[50]	train-merror:0.016712	eval-merror:0.087585
[100]	train-merror:0.009691	eval-merror:0.066374
[150]	train-merror:0.004717	eval-merror:0.048089
[200]	train-merror:0.002158	eval-merror:0.044432
[250]	train-merror:0.001097	eval-merror:0.041507
[300]	train-merror:0.000585	eval-merror:0.041324
[350]	train-merror:0.000512	eval-merror:0.039312
[400]	train-merror:0.000293	eval-merror:0.039861
[450]	train-merror:0.000183	eval-merror:0.039495

| Elapsed: 0:01:23.848902

| CV: 1
[0]	train-merror:0.070872	eval-merror:0.262571
[50]	train-merror:0.018541	eval-merror:0.042969
[100]	train-merror:0.010532	eval-merror:0.036021
[150]	train-merror:0.004717	eval-merror:0.035107
[200]	train-merror:0.002194	eval-merror:0.035656
[250]	train-merror:0.00117	eval-merror:0.034376
[300]	train-merror:0.000695	eval-merror:0.035107
[350]	train-merror:0.000366	eval-merror:0.033827
[400]	train-merror:0.000256	eval-merror:0.032913
[450]	train-merror:0.000219	eval-merror:0.032913

| Elapsed: 0:03:00.790664

| CV: 2
[0]	train-merror:0.084805	eval-merror:0.327299
[50]	train-merror:0.017444	eval-merror:0.102944
[100]	train-merror:0.010422	eval-merror:0.089047
[150]	train-merror:0.004827	eval-merror:0.075517
[200]	train-merror:0.002377	eval-merror:0.071128
[250]	train-merror:0.001317	eval-merror:0.0693
[300]	train-merror:0.000731	eval-merror:0.06674
[350]	train-merror:0.000439	eval-merror:0.063997
[400]	train-merror:0.000183	eval-merror:0.064911
[450]	train-merror:0.000146	eval-merror:0.065277

| Elapsed: 0:04:30.035039

| CV: 3
[0]	train-merror:0.06908	eval-merror:0.446517
[50]	train-merror:0.015725	eval-merror:0.256537
[100]	train-merror:0.009508	eval-merror:0.265496
[150]	train-merror:0.004608	eval-merror:0.303346
[200]	train-merror:0.002304	eval-merror:0.329859
[250]	train-merror:0.001243	eval-merror:0.346133
[300]	train-merror:0.000841	eval-merror:0.350338
[350]	train-merror:0.000512	eval-merror:0.351801
[400]	train-merror:0.000219	eval-merror:0.349973
[450]	train-merror:0.000146	eval-merror:0.356007

| Elapsed: 0:05:59.561071

| CV: 4
[0]	train-merror:0.10203	eval-merror:0.3145
[50]	train-merror:0.018248	eval-merror:0.044432
[100]	train-merror:0.010093	eval-merror:0.039312
[150]	train-merror:0.004608	eval-merror:0.038033
[200]	train-merror:0.002377	eval-merror:0.035656
[250]	train-merror:0.001317	eval-merror:0.03529
[300]	train-merror:0.000622	eval-merror:0.035838
[350]	train-merror:0.000439	eval-merror:0.034558
[400]	train-merror:0.000256	eval-merror:0.034376
[450]	train-merror:0.000146	eval-merror:0.034376

| Elapsed: 0:07:36.927472

| CV: 5
[0]	train-merror:0.083525	eval-merror:0.4968
[50]	train-merror:0.015798	eval-merror:0.192723
[100]	train-merror:0.008265	eval-merror:0.237338
[150]	train-merror:0.004169	eval-merror:0.228927
[200]	train-merror:0.001865	eval-merror:0.232767
[250]	train-merror:0.001061	eval-merror:0.232401
[300]	train-merror:0.000768	eval-merror:0.232218
[350]	train-merror:0.000366	eval-merror:0.233681
[400]	train-merror:0.000256	eval-merror:0.232218
[450]	train-merror:0.000183	eval-merror:0.232035

| Elapsed: 0:09:18.132102


In [29]:
print("----------------------------------")
print("n_estimators =", n_estimators)
print("min_error =", min_error)


----------------------------------
n_estimators = 383
min_error = 0.051106

In [30]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": 5,
    "min_child_weight": 1,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [31]:
print("----------------------------------")
params


----------------------------------
Out[31]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.5,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 5,
 'min_child_weight': 1,
 'n_estimators': 383,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

1b. Testing error


In [32]:
booster = xgb.train(params=params, dtrain=train_dmatrix, num_boost_round=params["n_estimators"])

In [33]:
y_predicted = booster.predict(test_dmatrix)

In [34]:
accuracy = metrics.accuracy_score(y_test, y_predicted)
cm = metrics.confusion_matrix(y_test, y_predicted)
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
cm *= 100
print(accuracy)
print(cm)


0.902358749314
[[ 97.19   0.07   0.     0.75   1.62   0.37]
 [  0.74  98.45   0.39   0.     0.42   0.01]
 [  0.65   6.46  92.19   0.     0.55   0.15]
 [ 25.74   0.     0.    67.02   5.79   1.45]
 [  1.26   0.07   0.     5.27  90.95   2.45]
 [  7.57   0.13   0.     0.71   0.41  91.19]]

2. Tune max_depth and min_child_weight


In [35]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": 5,
    "min_child_weight": 1,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "silent": 0,
    "seed": 42
}

cv_params = {
    "max_depth": list(range(3,10,2)), 
    "min_child_weight": list(range(1,10,2))
}

In [36]:
print("----------------------------------")
params
cv_params


----------------------------------
Out[36]:
{'max_depth': [3, 5, 7, 9], 'min_child_weight': [1, 3, 5, 7, 9]}

In [37]:
cv_model = cv_tune_tree_booster(
    X=X_train, y=y_train, params=params, cv_params=cv_params, cv=cv)

max_depth, min_child_weight = cv_model.best_params_["max_depth"], cv_model.best_params_["min_child_weight"]


Fitting 6 folds for each of 20 candidates, totalling 120 fits
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 22.7min
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 102.4min finished

In [38]:
print("----------------------------------")
cv_results = cv_model.cv_results_
for param_idx, param in enumerate(cv_results["params"]):
    print("{param} - mean: {mean:.6f}, std: {std:.6f}".format(
        param=param, mean=cv_results["mean_test_score"][param_idx], std=cv_results["std_test_score"][param_idx]))

print("----------------------------------")
print("max_depth =", max_depth)
print("min_child_weight =", min_child_weight)
print("best score =", cv_model.best_score_)


----------------------------------
{'max_depth': 3, 'min_child_weight': 1} - mean: 0.869324, std: 0.127181
{'max_depth': 3, 'min_child_weight': 3} - mean: 0.868227, std: 0.129352
{'max_depth': 3, 'min_child_weight': 5} - mean: 0.867099, std: 0.130871
{'max_depth': 3, 'min_child_weight': 7} - mean: 0.870330, std: 0.127460
{'max_depth': 3, 'min_child_weight': 9} - mean: 0.870086, std: 0.126819
{'max_depth': 5, 'min_child_weight': 1} - mean: 0.874566, std: 0.122147
{'max_depth': 5, 'min_child_weight': 3} - mean: 0.875510, std: 0.118223
{'max_depth': 5, 'min_child_weight': 5} - mean: 0.873408, std: 0.122494
{'max_depth': 5, 'min_child_weight': 7} - mean: 0.877400, std: 0.114564
{'max_depth': 5, 'min_child_weight': 9} - mean: 0.873804, std: 0.120069
{'max_depth': 7, 'min_child_weight': 1} - mean: 0.873621, std: 0.120794
{'max_depth': 7, 'min_child_weight': 3} - mean: 0.875632, std: 0.118581
{'max_depth': 7, 'min_child_weight': 5} - mean: 0.874535, std: 0.118904
{'max_depth': 7, 'min_child_weight': 7} - mean: 0.869080, std: 0.124743
{'max_depth': 7, 'min_child_weight': 9} - mean: 0.870360, std: 0.123165
{'max_depth': 9, 'min_child_weight': 1} - mean: 0.878223, std: 0.116094
{'max_depth': 9, 'min_child_weight': 3} - mean: 0.869659, std: 0.126523
{'max_depth': 9, 'min_child_weight': 5} - mean: 0.871092, std: 0.126816
{'max_depth': 9, 'min_child_weight': 7} - mean: 0.875023, std: 0.118956
{'max_depth': 9, 'min_child_weight': 9} - mean: 0.878345, std: 0.113459
----------------------------------
max_depth = 9
min_child_weight = 9
best score = 0.878344609008

2b. Fine tune max_depth and min_child_weight


In [39]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": 5,
    "min_child_weight": 1,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "silent": 0,
    "seed": 42
}

cv_params = {
    "max_depth": [max_depth-1, max_depth, max_depth+1], 
    "min_child_weight": [min_child_weight-1, min_child_weight, min_child_weight+1]
}

In [40]:
print("----------------------------------")
params
cv_params


----------------------------------
Out[40]:
{'max_depth': [8, 9, 10], 'min_child_weight': [8, 9, 10]}

In [41]:
cv_model = cv_tune_tree_booster(
    X=X_train, y=y_train, params=params, cv_params=cv_params, cv=cv)

max_depth, min_child_weight = cv_model.best_params_["max_depth"], cv_model.best_params_["min_child_weight"]


Fitting 6 folds for each of 9 candidates, totalling 54 fits
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 26.4min
[Parallel(n_jobs=-1)]: Done  54 out of  54 | elapsed: 47.5min finished

In [42]:
print("----------------------------------")
cv_results = cv_model.cv_results_
for param_idx, param in enumerate(cv_results["params"]):
    print("{param} - mean: {mean:.6f}, std: {std:.6f}".format(
        param=param, mean=cv_results["mean_test_score"][param_idx], std=cv_results["std_test_score"][param_idx]))

print("----------------------------------")
print("max_depth =", max_depth)
print("min_child_weight =", min_child_weight)
print("best score =", cv_model.best_score_)


----------------------------------
{'max_depth': 8, 'min_child_weight': 8} - mean: 0.871579, std: 0.125113
{'max_depth': 8, 'min_child_weight': 9} - mean: 0.875663, std: 0.118095
{'max_depth': 8, 'min_child_weight': 10} - mean: 0.867587, std: 0.124941
{'max_depth': 9, 'min_child_weight': 8} - mean: 0.873072, std: 0.123376
{'max_depth': 9, 'min_child_weight': 9} - mean: 0.878345, std: 0.113459
{'max_depth': 9, 'min_child_weight': 10} - mean: 0.864844, std: 0.132270
{'max_depth': 10, 'min_child_weight': 8} - mean: 0.873377, std: 0.122244
{'max_depth': 10, 'min_child_weight': 9} - mean: 0.870756, std: 0.127472
{'max_depth': 10, 'min_child_weight': 10} - mean: 0.871793, std: 0.119864
----------------------------------
max_depth = 9
min_child_weight = 9
best score = 0.878344609008

2c. Calibrate the number of boosting rounds


In [43]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [44]:
print("----------------------------------")
params


----------------------------------
Out[44]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.5,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 9,
 'min_child_weight': 9,
 'n_estimators': 383,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

In [45]:
cv_errors, min_error, n_estimators = cv_tune_num_boost_round(
    X=X_train, y=y_train, cv=cv, params=params, num_boost_round=500, early_stopping=50, verbose=50)


| CV: 0
[0]	train-merror:0.053977	eval-merror:0.147925
[50]	train-merror:0.01247	eval-merror:0.083379
[100]	train-merror:0.006948	eval-merror:0.05394
[150]	train-merror:0.003803	eval-merror:0.048638
[200]	train-merror:0.002523	eval-merror:0.045712
[250]	train-merror:0.002011	eval-merror:0.043884
[300]	train-merror:0.001572	eval-merror:0.039678
[350]	train-merror:0.001243	eval-merror:0.039678
[400]	train-merror:0.001097	eval-merror:0.038947
[450]	train-merror:0.001024	eval-merror:0.038033

| Elapsed: 0:01:27.582212

| CV: 1
[0]	train-merror:0.048382	eval-merror:0.130737
[50]	train-merror:0.012982	eval-merror:0.04041
[100]	train-merror:0.006948	eval-merror:0.033278
[150]	train-merror:0.00395	eval-merror:0.033096
[200]	train-merror:0.002926	eval-merror:0.034376
[250]	train-merror:0.002084	eval-merror:0.03273
[300]	train-merror:0.001755	eval-merror:0.032364
[350]	train-merror:0.001353	eval-merror:0.03145
[400]	train-merror:0.001207	eval-merror:0.032364
[450]	train-merror:0.000951	eval-merror:0.032181

| Elapsed: 0:02:56.093595

| CV: 2
[0]	train-merror:0.058036	eval-merror:0.116475
[50]	train-merror:0.012214	eval-merror:0.092704
[100]	train-merror:0.0064	eval-merror:0.082648
[150]	train-merror:0.003986	eval-merror:0.073871
[200]	train-merror:0.002999	eval-merror:0.070031
[250]	train-merror:0.002414	eval-merror:0.068751
[300]	train-merror:0.001938	eval-merror:0.066008
[350]	train-merror:0.001463	eval-merror:0.066557
[400]	train-merror:0.001243	eval-merror:0.067288
[450]	train-merror:0.001061	eval-merror:0.067106

| Elapsed: 0:04:23.844483

| CV: 3
[0]	train-merror:0.070506	eval-merror:0.457671
[50]	train-merror:0.012105	eval-merror:0.322545
[100]	train-merror:0.006985	eval-merror:0.346681
[150]	train-merror:0.004388	eval-merror:0.348693
[200]	train-merror:0.002926	eval-merror:0.332053
[250]	train-merror:0.002084	eval-merror:0.308832
[300]	train-merror:0.001865	eval-merror:0.313586
[350]	train-merror:0.001536	eval-merror:0.315048
[400]	train-merror:0.001243	eval-merror:0.31066
[450]	train-merror:0.001024	eval-merror:0.294752

| Elapsed: 0:05:50.861040

| CV: 4
[0]	train-merror:0.052039	eval-merror:0.099287
[50]	train-merror:0.012617	eval-merror:0.044615
[100]	train-merror:0.006619	eval-merror:0.039678
[150]	train-merror:0.00384	eval-merror:0.037667
[200]	train-merror:0.002633	eval-merror:0.035107
[250]	train-merror:0.002048	eval-merror:0.034741
[300]	train-merror:0.001682	eval-merror:0.035107
[350]	train-merror:0.001353	eval-merror:0.034558
[400]	train-merror:0.001134	eval-merror:0.035107
[450]	train-merror:0.000914	eval-merror:0.036204

| Elapsed: 0:07:19.021480

| CV: 5
[0]	train-merror:0.054526	eval-merror:0.530993
[50]	train-merror:0.011885	eval-merror:0.232949
[100]	train-merror:0.006546	eval-merror:0.240629
[150]	train-merror:0.00362	eval-merror:0.236241
[200]	train-merror:0.002377	eval-merror:0.235692
[250]	train-merror:0.001938	eval-merror:0.234412
[300]	train-merror:0.001609	eval-merror:0.240812
[350]	train-merror:0.00128	eval-merror:0.242092
[400]	train-merror:0.001097	eval-merror:0.246297
[450]	train-merror:0.000987	eval-merror:0.244286

| Elapsed: 0:08:45.393540


In [46]:
print("----------------------------------")
print("n_estimators =", n_estimators)
print("min_error =", min_error)


----------------------------------
n_estimators = 321
min_error = 0.052569

2d. Testing error


In [47]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [48]:
print("----------------------------------")
params


----------------------------------
Out[48]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.5,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 9,
 'min_child_weight': 9,
 'n_estimators': 321,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

In [49]:
booster = xgb.train(params=params, dtrain=train_dmatrix, num_boost_round=params["n_estimators"])

In [50]:
y_predicted = booster.predict(test_dmatrix)

In [51]:
accuracy = metrics.accuracy_score(y_test, y_predicted)
cm = metrics.confusion_matrix(y_test, y_predicted)
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
cm *= 100
print(accuracy)
print(cm)


0.904796733102
[[ 97.48   0.04   0.     0.68   1.57   0.24]
 [  0.74  98.66   0.32   0.     0.29   0.  ]
 [  1.05   6.55  92.07   0.     0.15   0.18]
 [ 26.17   0.     0.    67.14   4.83   1.87]
 [  1.26   0.11   0.     4.6   91.25   2.78]
 [  7.06   0.24   0.     0.66   0.28  91.76]]

3. Tune gamma


In [52]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": 0.5,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "silent": 0,
    "seed": 42
}

cv_params = {
    "gamma": [i/10.0 for i in range(0, 11)]
}

In [53]:
print("----------------------------------")
params
cv_params


----------------------------------
Out[53]:
{'gamma': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}

In [54]:
cv_model = cv_tune_tree_booster(
    X=X_train, y=y_train, params=params, cv_params=cv_params, cv=cv)

gamma = cv_model.best_params_["gamma"]


Fitting 6 folds for each of 11 candidates, totalling 66 fits
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed: 22.7min
[Parallel(n_jobs=-1)]: Done  66 out of  66 | elapsed: 49.7min finished

In [55]:
print("----------------------------------")
cv_results = cv_model.cv_results_
for param_idx, param in enumerate(cv_results["params"]):
    print("{param} - mean: {mean:.6f}, std: {std:.6f}".format(
        param=param, mean=cv_results["mean_test_score"][param_idx], std=cv_results["std_test_score"][param_idx]))

print("----------------------------------")
print("gamma =", gamma)


----------------------------------
{'gamma': 0.0} - mean: 0.873164, std: 0.121493
{'gamma': 0.1} - mean: 0.869415, std: 0.128694
{'gamma': 0.2} - mean: 0.872981, std: 0.122192
{'gamma': 0.3} - mean: 0.872463, std: 0.121026
{'gamma': 0.4} - mean: 0.873286, std: 0.121653
{'gamma': 0.5} - mean: 0.878192, std: 0.114036
{'gamma': 0.6} - mean: 0.878497, std: 0.112706
{'gamma': 0.7} - mean: 0.874992, std: 0.117516
{'gamma': 0.8} - mean: 0.875175, std: 0.119884
{'gamma': 0.9} - mean: 0.876120, std: 0.119733
{'gamma': 1.0} - mean: 0.875937, std: 0.119502
----------------------------------
gamma = 0.6

3b. Calibrate the number of boosting rounds


In [56]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": gamma,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [59]:
print("----------------------------------")
params


----------------------------------
Out[59]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.6,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 9,
 'min_child_weight': 9,
 'n_estimators': 321,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

In [61]:
cv_errors, min_error, n_estimators = cv_tune_num_boost_round(
    X=X_train, y=y_train, cv=cv, params=params, num_boost_round=500, early_stopping=50, verbose=50)


| CV: 0
[0]	train-merror:0.053977	eval-merror:0.147925
[50]	train-merror:0.012946	eval-merror:0.066923
[100]	train-merror:0.0064	eval-merror:0.056866
[150]	train-merror:0.003876	eval-merror:0.049186
[200]	train-merror:0.002596	eval-merror:0.046809
[250]	train-merror:0.001828	eval-merror:0.043335
[300]	train-merror:0.001646	eval-merror:0.042969
[350]	train-merror:0.001317	eval-merror:0.041141
[400]	train-merror:0.001134	eval-merror:0.041141
[450]	train-merror:0.001097	eval-merror:0.039861

| Elapsed: 0:01:25.977466

| CV: 1
[0]	train-merror:0.048345	eval-merror:0.130554
[50]	train-merror:0.012909	eval-merror:0.038033
[100]	train-merror:0.007131	eval-merror:0.034558
[150]	train-merror:0.003913	eval-merror:0.03273
[200]	train-merror:0.003072	eval-merror:0.033278
[250]	train-merror:0.002121	eval-merror:0.03401
[300]	train-merror:0.001609	eval-merror:0.033096
[350]	train-merror:0.001317	eval-merror:0.033278
[400]	train-merror:0.00128	eval-merror:0.032913
[450]	train-merror:0.001207	eval-merror:0.032913

| Elapsed: 0:02:55.615314

| CV: 2
[0]	train-merror:0.058146	eval-merror:0.116475
[50]	train-merror:0.012397	eval-merror:0.095447
[100]	train-merror:0.006948	eval-merror:0.079722
[150]	train-merror:0.003986	eval-merror:0.069848
[200]	train-merror:0.002999	eval-merror:0.068568
[250]	train-merror:0.002414	eval-merror:0.063997
[300]	train-merror:0.001755	eval-merror:0.063631
[350]	train-merror:0.001646	eval-merror:0.0629
[400]	train-merror:0.001317	eval-merror:0.061437
[450]	train-merror:0.00117	eval-merror:0.060889

| Elapsed: 0:04:24.937390

| CV: 3
[0]	train-merror:0.070324	eval-merror:0.457122
[50]	train-merror:0.012068	eval-merror:0.329311
[100]	train-merror:0.007131	eval-merror:0.353081
[150]	train-merror:0.004681	eval-merror:0.350704
[200]	train-merror:0.003108	eval-merror:0.344487
[250]	train-merror:0.002523	eval-merror:0.313037
[300]	train-merror:0.001938	eval-merror:0.329676
[350]	train-merror:0.001499	eval-merror:0.315231
[400]	train-merror:0.001317	eval-merror:0.309014
[450]	train-merror:0.001134	eval-merror:0.309014

| Elapsed: 0:05:58.118424

| CV: 4
[0]	train-merror:0.052039	eval-merror:0.099287
[50]	train-merror:0.012324	eval-merror:0.049735
[100]	train-merror:0.00629	eval-merror:0.046078
[150]	train-merror:0.00395	eval-merror:0.042604
[200]	train-merror:0.002999	eval-merror:0.038947
[250]	train-merror:0.002121	eval-merror:0.03529
[300]	train-merror:0.001682	eval-merror:0.036935
[350]	train-merror:0.001426	eval-merror:0.036021
[400]	train-merror:0.001134	eval-merror:0.03785
[450]	train-merror:0.000951	eval-merror:0.036204

| Elapsed: 0:07:27.102637

| CV: 5
[0]	train-merror:0.054343	eval-merror:0.53081
[50]	train-merror:0.011958	eval-merror:0.230938
[100]	train-merror:0.006802	eval-merror:0.234595
[150]	train-merror:0.003913	eval-merror:0.233681
[200]	train-merror:0.002487	eval-merror:0.228012
[250]	train-merror:0.001865	eval-merror:0.230572
[300]	train-merror:0.001646	eval-merror:0.233864
[350]	train-merror:0.00128	eval-merror:0.236423
[400]	train-merror:0.00117	eval-merror:0.238435
[450]	train-merror:0.001024	eval-merror:0.239715

| Elapsed: 0:09:00.848468


In [62]:
print("----------------------------------")
print("n_estimators =", n_estimators)
print("min_error =", min_error)


----------------------------------
n_estimators = 498
min_error = 0.049918

3c. Testing error


In [63]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": gamma,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [65]:
booster = xgb.train(params=params, dtrain=train_dmatrix, num_boost_round=params["n_estimators"])

In [70]:
y_predicted = booster.predict(test_dmatrix)

In [71]:
accuracy = metrics.accuracy_score(y_test, y_predicted)
cm = metrics.confusion_matrix(y_test, y_predicted)
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
cm *= 100
print(accuracy)
print(cm)


0.904979581886
[[ 97.33   0.11   0.     0.58   1.62   0.35]
 [  0.74  98.55   0.46   0.     0.25   0.01]
 [  0.8    6.67  92.22   0.03   0.12   0.15]
 [ 26.21   0.     0.    67.5    4.66   1.64]
 [  1.17   0.09   0.     5.56  90.44   2.74]
 [  5.97   0.15   0.09   0.79   0.3   92.7 ]]

4. Tune subsample


In [72]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": gamma,
    "subsample": 0.5,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "silent": 0,
    "seed": 42
}

cv_params = {
    "subsample": [i/10.0 for i in range(5,11)]
}

In [73]:
print("----------------------------------")
params
cv_params


----------------------------------
Out[73]:
{'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}

In [74]:
cv_model = cv_tune_tree_booster(
    X=X_train, y=y_train, params=params, cv_params=cv_params, cv=cv)

subsample = cv_model.best_params_["subsample"]


Fitting 6 folds for each of 6 candidates, totalling 36 fits
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed: 44.2min finished

In [75]:
print("----------------------------------")
cv_results = cv_model.cv_results_
for param_idx, param in enumerate(cv_results["params"]):
    print("{param} - mean: {mean:.6f}, std: {std:.6f}".format(
        param=param, mean=cv_results["mean_test_score"][param_idx], std=cv_results["std_test_score"][param_idx]))

print("----------------------------------")
print("subsample =", subsample)


----------------------------------
{'subsample': 0.5} - mean: 0.880051, std: 0.111949
{'subsample': 0.6} - mean: 0.871153, std: 0.123752
{'subsample': 0.7} - mean: 0.871335, std: 0.121681
{'subsample': 0.8} - mean: 0.874718, std: 0.119554
{'subsample': 0.9} - mean: 0.874139, std: 0.120091
{'subsample': 1.0} - mean: 0.876973, std: 0.113224
----------------------------------
subsample = 0.5

4b. Calibrate the number of boosting rounds


In [60]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": gamma,
    "subsample": subsample,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [61]:
print("----------------------------------")
params


----------------------------------
Out[61]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.6,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 9,
 'min_child_weight': 9,
 'n_estimators': 30,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

In [62]:
cv_errors, min_error, n_estimators = cv_tune_num_boost_round(
    X=X_train, y=y_train, cv=cv, params=params, num_boost_round=500, early_stopping=50, verbose=50)


| CV: 0
[0]	train-merror:0.053977	eval-merror:0.147925
[50]	train-merror:0.012946	eval-merror:0.066923
[100]	train-merror:0.0064	eval-merror:0.056866
[150]	train-merror:0.003876	eval-merror:0.049186
[200]	train-merror:0.002596	eval-merror:0.046809
[250]	train-merror:0.001828	eval-merror:0.043335
[300]	train-merror:0.001646	eval-merror:0.042969
[350]	train-merror:0.001317	eval-merror:0.041141
[400]	train-merror:0.001134	eval-merror:0.041141
[450]	train-merror:0.001097	eval-merror:0.039861

| Elapsed: 0:01:30.296482

| CV: 1
[0]	train-merror:0.048345	eval-merror:0.130554
[50]	train-merror:0.012909	eval-merror:0.038033
[100]	train-merror:0.007131	eval-merror:0.034558
[150]	train-merror:0.003913	eval-merror:0.03273
[200]	train-merror:0.003072	eval-merror:0.033278
[250]	train-merror:0.002121	eval-merror:0.03401
[300]	train-merror:0.001609	eval-merror:0.033096
[350]	train-merror:0.001317	eval-merror:0.033278
[400]	train-merror:0.00128	eval-merror:0.032913
[450]	train-merror:0.001207	eval-merror:0.032913

| Elapsed: 0:03:03.702524

| CV: 2
[0]	train-merror:0.058146	eval-merror:0.116475
[50]	train-merror:0.012397	eval-merror:0.095447
[100]	train-merror:0.006948	eval-merror:0.079722
[150]	train-merror:0.003986	eval-merror:0.069848
[200]	train-merror:0.002999	eval-merror:0.068568
[250]	train-merror:0.002414	eval-merror:0.063997
[300]	train-merror:0.001755	eval-merror:0.063631
[350]	train-merror:0.001646	eval-merror:0.0629
[400]	train-merror:0.001317	eval-merror:0.061437
[450]	train-merror:0.00117	eval-merror:0.060889

| Elapsed: 0:04:38.610067

| CV: 3
[0]	train-merror:0.070324	eval-merror:0.457122
[50]	train-merror:0.012068	eval-merror:0.329311
[100]	train-merror:0.007131	eval-merror:0.353081
[150]	train-merror:0.004681	eval-merror:0.350704
[200]	train-merror:0.003108	eval-merror:0.344487
[250]	train-merror:0.002523	eval-merror:0.313037
[300]	train-merror:0.001938	eval-merror:0.329676
[350]	train-merror:0.001499	eval-merror:0.315231
[400]	train-merror:0.001317	eval-merror:0.309014
[450]	train-merror:0.001134	eval-merror:0.309014

| Elapsed: 0:06:12.040626

| CV: 4
[0]	train-merror:0.052039	eval-merror:0.099287
[50]	train-merror:0.012324	eval-merror:0.049735
[100]	train-merror:0.00629	eval-merror:0.046078
[150]	train-merror:0.00395	eval-merror:0.042604
[200]	train-merror:0.002999	eval-merror:0.038947
[250]	train-merror:0.002121	eval-merror:0.03529
[300]	train-merror:0.001682	eval-merror:0.036935
[350]	train-merror:0.001426	eval-merror:0.036021
[400]	train-merror:0.001134	eval-merror:0.03785
[450]	train-merror:0.000951	eval-merror:0.036204

| Elapsed: 0:07:46.566998

| CV: 5
[0]	train-merror:0.054343	eval-merror:0.53081
[50]	train-merror:0.011958	eval-merror:0.230938
[100]	train-merror:0.006802	eval-merror:0.234595
[150]	train-merror:0.003913	eval-merror:0.233681
[200]	train-merror:0.002487	eval-merror:0.228012
[250]	train-merror:0.001865	eval-merror:0.230572
[300]	train-merror:0.001646	eval-merror:0.233864
[350]	train-merror:0.00128	eval-merror:0.236423
[400]	train-merror:0.00117	eval-merror:0.238435
[450]	train-merror:0.001024	eval-merror:0.239715

| Elapsed: 0:09:18.453025


In [64]:
print("----------------------------------")
print("n_estimators =", n_estimators)
print("min_error =", min_error)


----------------------------------
n_estimators = 498
min_error = 0.049918

4c. Testing error


In [29]:
params = {
    "learning_rate": 0.05,
    "n_estimators": n_estimators,
    "max_depth": max_depth,
    "min_child_weight": min_child_weight,
    "max_delta_step": 1,
    "gamma": gamma,
    "subsample": subsample,
    "colsample_bytree": 0.5,
    "colsample_bylevel": 0.5,
    "reg_lambda": 1,
    "reg_alpha": 0,
    "scale_pos_weight": 1,
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_class": 6,
    "silent": 0,
    "seed": 42
}

In [30]:
params


Out[30]:
{'colsample_bylevel': 0.5,
 'colsample_bytree': 0.5,
 'eval_metric': 'merror',
 'gamma': 0.6,
 'learning_rate': 0.05,
 'max_delta_step': 1,
 'max_depth': 9,
 'min_child_weight': 9,
 'n_estimators': 498,
 'num_class': 6,
 'objective': 'multi:softmax',
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': 42,
 'silent': 0,
 'subsample': 0.5}

In [31]:
booster_train = xgb.train(params=params, dtrain=train_dmatrix, num_boost_round=params["n_estimators"])

In [32]:
y_predicted = booster_train.predict(test_dmatrix)

In [33]:
accuracy = metrics.accuracy_score(y_test, y_predicted)
cm = metrics.confusion_matrix(y_test, y_predicted)
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
cm *= 100
print(accuracy)
print(cm)


0.904979581886
[[ 97.33   0.11   0.     0.58   1.62   0.35]
 [  0.74  98.55   0.46   0.     0.25   0.01]
 [  0.8    6.67  92.22   0.03   0.12   0.15]
 [ 26.21   0.     0.    67.5    4.66   1.64]
 [  1.17   0.09   0.     5.56  90.44   2.74]
 [  5.97   0.15   0.09   0.79   0.3   92.7 ]]

In [36]:
booster_train.save_model("initial_cs_train.model")

In [35]:
booster_all = xgb.train(params=params, dtrain=all_dmatrix, num_boost_round=params["n_estimators"])

In [37]:
booster_all.save_model("initial_cs_all.model")